The PAE VMX guest supports NX bit and can do kernel build successfully.
Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Xiaohui Xin <xiaohui.xin@intel.com>
obj-y += x86_emulate.o
ifneq ($(pae),n)
-obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o
+obj-$(x86_32) += shadow.o shadow_public.o shadow_guest32.o shadow_guest32pae.o
else
obj-$(x86_32) += shadow32.o
endif
d->domain_id, page_to_mfn(page),
page->u.inuse.type_info,
page->count_info);
- printk("a->gpfn_and_flags=%p\n",
- (void *)a->gpfn_and_flags);
+ printk("a->gpfn_and_flags=%"PRIx64"\n",
+ (u64)a->gpfn_and_flags);
errors++;
}
break;
if ( vmx_pgbit_test(v) )
{
/* The guest is a 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
unsigned long mfn, old_base_mfn;
if( !shadow_set_guest_paging_levels(v->domain, PAGING_L3) )
else
{
/* The guest is a 64 bit or 32-bit PAE guest. */
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( (v->domain->arch.ops != NULL) &&
v->domain->arch.ops->guest_paging_levels == PAGING_L2)
{
domain_crash_synchronous();
}
}
- else
- {
- if ( !shadow_set_guest_paging_levels(v->domain,
- PAGING_L4) )
- {
- printk("Unsupported guest paging levels\n");
- domain_crash_synchronous();
- }
- }
#endif
}
}
unsigned long smfn, real_gpfn;
int pin = 0;
void *l1, *lp;
+ u64 index = 0;
// Currently, we only keep pre-zero'ed pages around for use as L1's...
// This will change. Soon.
if ( d->arch.ops->guest_paging_levels == PAGING_L2 )
pin = 1;
#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+ /*
+ * We use PGT_l4_shadow for 2-level paging guests on PAE
+ */
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ pin = 1;
+#endif
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ index = get_cr3_idxval(current);
break;
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_fl1_shadow:
perfc_incr(shadow_l1_pages);
d->arch.shadow_page_count++;
//
ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
- set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+ set_shadow_status(d, gpfn, gmfn, smfn, psh_type, index);
if ( pin )
shadow_pin(smfn);
prediction = (prediction & PGT_mfn_mask) | score;
//printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0);
if ( create )
perfc_incr(writable_pte_predictions);
//printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
if ( score )
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0);
else
{
- delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 0);
perfc_decr(writable_pte_predictions);
}
}
int is_l1_shadow =
((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
PGT_l1_shadow);
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
is_l1_shadow |=
((mfn_to_page(pt_mfn)->u.inuse.type_info & PGT_type_mask) ==
PGT_fl1_shadow);
while ( a && a->gpfn_and_flags )
{
if ( (a->gpfn_and_flags & PGT_type_mask) == PGT_l1_shadow
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
|| (a->gpfn_and_flags & PGT_type_mask) == PGT_fl1_shadow
#endif
)
continue;
idx = get_cr3_idxval(v);
- smfn = __shadow_status(
- d, ((unsigned long)(idx << PGT_pae_idx_shift) | entry->gpfn), PGT_l4_shadow);
+
+ smfn = __shadow_status(d, entry->gpfn, PGT_l4_shadow);
if ( !smfn )
continue;
{
int error;
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
unsigned long gpfn;
gpfn = guest_l1e_get_paddr(guest1[i]) >> PAGE_SHIFT;
v->arch.guest_vtable = map_domain_page_global(gmfn);
}
-#if CONFIG_PAGING_LEVELS >= 3
- /*
- * Handle 32-bit PAE enabled guest
- */
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- u32 index = get_cr3_idxval(v);
- gpfn = ((unsigned long)index << PGT_pae_idx_shift) | gpfn;
- }
-#endif
-
/*
* arch.shadow_table
*/
}
else
#endif
+
+#if CONFIG_PAGING_LEVELS == 3 & defined ( GUEST_32PAE )
+ /*
+ * We use PGT_l4_shadow for 2-level paging guests on PAE
+ */
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_l4_shadow))) )
+ smfn = shadow_l3_table(v, gpfn, gmfn);
+ else
+ {
+ update_top_level_shadow(v, smfn);
+ need_sync = 1;
+ }
+ }
+ else
+#endif
if ( unlikely(!(smfn = __shadow_status(d, gpfn, PGT_base_page_table))) )
{
#if CONFIG_PAGING_LEVELS == 2
return smfn;
}
+
+static inline unsigned long init_l3(
+ struct vcpu *v, unsigned long gpfn, unsigned long gmfn)
+{
+ unsigned long smfn;
+ l4_pgentry_t *spl4e;
+ unsigned long index;
+
+ if ( unlikely(!(smfn = alloc_shadow_page(v->domain, gpfn, gmfn, PGT_l4_shadow))) )
+ {
+ printk("Couldn't alloc an L4 shadow for pfn= %lx mfn= %lx\n", gpfn, gmfn);
+ BUG(); /* XXX Deal gracefully wiht failure. */
+ }
+
+ /* Map the self entry, L4&L3 share the same page */
+ spl4e = (l4_pgentry_t *)map_domain_page(smfn);
+
+ /*
+ * Shadow L4's pfn_info->tlbflush_timestamp
+ * should also save it's own index.
+ */
+
+ index = get_cr3_idxval(v);
+ frame_table[smfn].tlbflush_timestamp = index;
+
+ memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
+ spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
+ unmap_domain_page(spl4e);
+ return smfn;
+}
#endif
#if CONFIG_PAGING_LEVELS == 3
return init_bl2(d, gpfn, gmfn);
}
+ if ( SH_GUEST_32PAE &&
+ d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ return init_l3(v, gpfn, gmfn);
+ }
+
if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l3_shadow))) )
{
printk("Couldn't alloc an L3 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
return init_bl2(d, gpfn, gmfn);
}
+ if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
+ {
+ return init_l3(v, gpfn, gmfn);
+ }
+
if ( unlikely(!(smfn = alloc_shadow_page(d, gpfn, gmfn, PGT_l4_shadow))) )
{
printk("Couldn't alloc an L4 shadow for pfn=%lx mfn=%lx\n", gpfn, gmfn);
spl4e = (l4_pgentry_t *)map_domain_page(smfn);
- /* For 32-bit PAE guest on 64-bit host */
- if ( SH_GUEST_32PAE && d->arch.ops->guest_paging_levels == PAGING_L3 )
- {
- unsigned long index;
- /*
- * Shadow L4's pfn_info->tlbflush_timestamp
- * should also save it's own index.
- */
- index = get_cr3_idxval(v);
- frame_table[smfn].tlbflush_timestamp = index;
-
- memset(spl4e, 0, L4_PAGETABLE_ENTRIES*sizeof(l4_pgentry_t));
- /* Map the self entry */
- spl4e[PAE_SHADOW_SELF_ENTRY] = l4e_from_pfn(smfn, __PAGE_HYPERVISOR);
- unmap_domain_page(spl4e);
- return smfn;
- }
-
/* Install hypervisor and 4x linear p.t. mapings. */
if ( (PGT_base_page_table == PGT_l4_page_table) &&
!shadow_mode_external(d) )
* This shadow_mark_va_out_of_sync() is for 2M page shadow
*/
static void shadow_mark_va_out_of_sync_2mp(
- struct vcpu *v, unsigned long gpfn, unsigned long mfn, unsigned long writable_pl1e)
+ struct vcpu *v, unsigned long gpfn, unsigned long mfn, paddr_t writable_pl1e)
{
struct out_of_sync_entry *entry =
shadow_mark_mfn_out_of_sync(v, gpfn, mfn);
}
unmap_domain_page(l1_p);
+ *gl2e_p = gl2e;
return 1;
}
ASSERT( d->arch.ops->guest_paging_levels >= PAGING_L3 );
-#if CONFIG_PAGING_LEVELS >= 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( (error_code & (ERROR_I | ERROR_P)) == (ERROR_I | ERROR_P) )
return 1;
#endif
};
#endif
-#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) ) || \
+#if ( CONFIG_PAGING_LEVELS == 3 && !defined (GUEST_PGENTRY_32) && !defined (GUEST_32PAE) ) || \
( CONFIG_PAGING_LEVELS == 4 && defined (GUEST_PGENTRY_32) )
//
ASSERT( (psh_type == PGT_snapshot) || !mfn_out_of_sync(gmfn) );
- set_shadow_status(d, gpfn, gmfn, smfn, psh_type);
+ set_shadow_status(d, gpfn, gmfn, smfn, psh_type, 0);
if ( pin )
shadow_pin(smfn);
ASSERT( ! IS_INVALID_M2P_ENTRY(gpfn) );
- delete_shadow_status(d, gpfn, gmfn, type);
+ delete_shadow_status(d, gpfn, gmfn, type, 0);
switch ( type )
{
prediction = (prediction & PGT_mfn_mask) | score;
//printk("increase gpfn=%lx pred=%lx create=%d\n", gpfn, prediction, create);
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0);
if ( create )
perfc_incr(writable_pte_predictions);
//printk("decrease gpfn=%lx pred=%lx score=%lx\n", gpfn, prediction, score);
if ( score )
- set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred);
+ set_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, prediction, PGT_writable_pred, 0);
else
{
- delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred);
+ delete_shadow_status(d, GPFN_TO_GPTEPAGE(gpfn), 0, PGT_writable_pred, 0);
perfc_decr(writable_pte_predictions);
}
}
* keep an accurate count of writable_pte_predictions to keep it
* happy.
*/
- delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
perfc_decr(writable_pte_predictions);
}
#define GUEST_32PAE
-#if defined (__x86_64__)
#include "shadow.c"
struct shadow_ops MODE_64_PAE_HANDLER = {
.gva_to_gpa = gva_to_gpa_64,
};
-#endif
#endif
#if CONFIG_PAGING_LEVELS == 3
case 3:
- if ( d->arch.ops != &MODE_64_3_HANDLER )
- d->arch.ops = &MODE_64_3_HANDLER;
+ if ( d->arch.ops == NULL ||
+ shadow_mode_log_dirty(d) )
+ {
+ if ( d->arch.ops != &MODE_64_3_HANDLER )
+ d->arch.ops = &MODE_64_3_HANDLER;
+ }
+ else
+ {
+ if ( d->arch.ops == &MODE_64_2_HANDLER )
+ free_shadow_pages(d);
+ if ( d->arch.ops != &MODE_64_PAE_HANDLER )
+ d->arch.ops = &MODE_64_PAE_HANDLER;
+ }
shadow_unlock(d);
return 1;
#endif
put_shadow_ref(entry_get_pfn(ple[i]));
if (d->arch.ops->guest_paging_levels == PAGING_L3)
{
-#if CONFIG_PAGING_LEVELS == 4
+#if CONFIG_PAGING_LEVELS >= 3
if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L4 )
-#elif CONFIG_PAGING_LEVELS == 3
- if ( i == PAE_L3_PAGETABLE_ENTRIES && level == PAGING_L3 )
#endif
break;
}
struct domain *d = page_get_owner(mfn_to_page(gmfn));
unsigned long gpfn = mfn_to_gmfn(d, gmfn);
unsigned long type = page->u.inuse.type_info & PGT_type_mask;
+ u64 index = 0;
SH_VVLOG("%s: free'ing smfn=%lx", __func__, smfn);
if ( !mfn )
gpfn |= (1UL << 63);
}
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
if ( d->arch.ops->guest_paging_levels == PAGING_L3 )
- if ( type == PGT_l4_shadow )
- gpfn = ((unsigned long)page->tlbflush_timestamp << PGT_pae_idx_shift) | gpfn;
+ {
+ if ( type == PGT_l4_shadow )
+ index = page->tlbflush_timestamp;
+ }
#endif
- delete_shadow_status(d, gpfn, gmfn, type);
+ delete_shadow_status(d, gpfn, gmfn, type, index);
switch ( type )
{
while ( count )
{
count--;
- delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred);
+ delete_shadow_status(d, gpfn_list[count], 0, PGT_writable_pred, 0);
}
xfree(gpfn_list);
{
if ( d->arch.shadow_ht[i].gpfn_and_flags != 0 )
{
- printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%lx\n",
- __FILE__, i, d->arch.shadow_ht[i].gpfn_and_flags);
+ printk("%s: d->arch.shadow_ht[%x].gpfn_and_flags=%"PRIx64"\n",
+ __FILE__, i, (u64)d->arch.shadow_ht[i].gpfn_and_flags);
BUG();
}
}
#define PGT_high_mfn_mask (0xfffUL << PGT_high_mfn_shift)
#define PGT_mfn_mask (((1U<<23)-1) | PGT_high_mfn_mask)
#define PGT_high_mfn_nx (0x800UL << PGT_high_mfn_shift)
-#define PGT_pae_idx_shift PGT_high_mfn_shift
#else
/* 23-bit mfn mask for shadow types: good for up to 32GB RAM. */
#define PGT_mfn_mask ((1U<<23)-1)
/* NX for PAE xen is not supported yet */
#define PGT_high_mfn_nx (1ULL << 63)
-#define PGT_pae_idx_shift 23
#endif
#define PGT_score_shift 23
} while (0)
#endif
+#if CONFIG_PAGING_LEVELS >= 3
+static inline u64 get_cr3_idxval(struct vcpu *v)
+{
+ u64 pae_cr3;
+
+ if ( v->domain->arch.ops->guest_paging_levels == PAGING_L3 &&
+ !shadow_mode_log_dirty(v->domain) )
+ {
+ pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
+ return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
+ }
+ else
+ return 0;
+}
+
+#define shadow_key_t u64
+#define index_to_key(x) ((x) << 32)
+#else
+#define get_cr3_idxval(v) (0)
+#define shadow_key_t unsigned long
+#define index_to_key(x) (0)
+#endif
+
+
#define SHADOW_ENCODE_MIN_MAX(_min, _max) ((((GUEST_L1_PAGETABLE_ENTRIES - 1) - (_max)) << 16) | (_min))
#define SHADOW_MIN(_encoded) ((_encoded) & ((1u<<16) - 1))
#define SHADOW_MAX(_encoded) ((GUEST_L1_PAGETABLE_ENTRIES - 1) - ((_encoded) >> 16))
struct shadow_status {
struct shadow_status *next; /* Pull-to-front list per hash bucket. */
- unsigned long gpfn_and_flags; /* Guest pfn plus flags. */
+ shadow_key_t gpfn_and_flags; /* Guest pfn plus flags. */
unsigned long smfn; /* Shadow mfn. */
};
struct domain *d, unsigned long gpfn, unsigned long stype)
{
struct shadow_status *p, *x, *head;
- unsigned long key = gpfn | stype;
+ shadow_key_t key;
+#if CONFIG_PAGING_LEVELS >= 3
+ if ( d->arch.ops->guest_paging_levels == PAGING_L3 && stype == PGT_l4_shadow )
+ key = gpfn | stype | index_to_key(get_cr3_idxval(current));
+ else
+#endif
+ key = gpfn | stype;
ASSERT(shadow_lock_is_acquired(d));
ASSERT(gpfn == (gpfn & PGT_mfn_mask));
}
static inline void delete_shadow_status(
- struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype)
+ struct domain *d, unsigned long gpfn, unsigned long gmfn, unsigned int stype, u64 index)
{
struct shadow_status *p, *x, *n, *head;
- unsigned long key = gpfn | stype;
+
+ shadow_key_t key = gpfn | stype | index_to_key(index);
ASSERT(shadow_lock_is_acquired(d));
ASSERT(!(gpfn & ~PGT_mfn_mask));
static inline void set_shadow_status(
struct domain *d, unsigned long gpfn, unsigned long gmfn,
- unsigned long smfn, unsigned long stype)
+ unsigned long smfn, unsigned long stype, u64 index)
{
struct shadow_status *x, *head, *extra;
int i;
- unsigned long key = gpfn | stype;
+
+ shadow_key_t key = gpfn | stype | index_to_key(index);
SH_VVLOG("set gpfn=%lx gmfn=%lx smfn=%lx t=%lx", gpfn, gmfn, smfn, stype);
*/
extern struct shadow_ops MODE_64_2_HANDLER;
extern struct shadow_ops MODE_64_3_HANDLER;
+extern struct shadow_ops MODE_64_PAE_HANDLER;
#if CONFIG_PAGING_LEVELS == 4
extern struct shadow_ops MODE_64_4_HANDLER;
-extern struct shadow_ops MODE_64_PAE_HANDLER;
#endif
#if CONFIG_PAGING_LEVELS == 3
#define ESH_LOG(_f, _a...) ((void)0)
#endif
-#define PAGING_L4 4UL
-#define PAGING_L3 3UL
-#define PAGING_L2 2UL
-#define PAGING_L1 1UL
#define L_MASK 0xff
#define PAE_PAGING_LEVELS 3
#define entry_has_changed(x,y,flags) \
( !!(((x).lo ^ (y).lo) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags(flags))) )
-#define PAE_SHADOW_SELF_ENTRY 259
-#define PAE_L3_PAGETABLE_ENTRIES 4
-
/******************************************************************************/
/*
- * The macro and inlines are for 32-bit PAE guest on 64-bit host
+ * The macro and inlines are for 32-bit PAE guest
*/
-#define PAE_CR3_ALIGN 5
-#define PAE_CR3_IDX_MASK 0x7f
-#define PAE_CR3_IDX_NO 128
-
#define PAE_PDPT_RESERVED 0x1e6 /* [8:5], [2,1] */
+#define PAE_SHADOW_SELF_ENTRY 259
+#define PAE_L3_PAGETABLE_ENTRIES 4
+
/******************************************************************************/
static inline int table_offset_64(unsigned long va, int level)
{
}
}
-static inline unsigned long get_cr3_idxval(struct vcpu *v)
-{
- unsigned long pae_cr3 = hvm_get_guest_ctrl_reg(v, 3); /* get CR3 */
-
- return (pae_cr3 >> PAE_CR3_ALIGN) & PAE_CR3_IDX_MASK;
-}
-
-
#define SH_GUEST_32PAE 1
#else
#define guest_table_offset_64(va, level, index) \
table_offset_64((va),(level))
-#define get_cr3_idxval(v) 0
#define SH_GUEST_32PAE 0
#endif
l1_p =(pgentry_64_t *)map_domain_page(smfn);
for (i = 0; i < L1_PAGETABLE_ENTRIES; i++)
- entry_remove_flags(l1_p[i], _PAGE_RW);
+ {
+ if ( mfn_is_page_table(entry_get_pfn(l1_p[i])) )
+ entry_remove_flags(l1_p[i], _PAGE_RW);
+ }
unmap_domain_page(l1_p);
}
#ifndef _XEN_SHADOW_OPS_H
#define _XEN_SHADOW_OPS_H
+#define PAGING_L4 4UL
+#define PAGING_L3 3UL
+#define PAGING_L2 2UL
+#define PAGING_L1 1UL
+
+#define PAE_CR3_ALIGN 5
+#define PAE_CR3_IDX_MASK 0x7f
+
#if defined( GUEST_PGENTRY_32 )
#define GUEST_L1_PAGETABLE_ENTRIES L1_PAGETABLE_ENTRIES_32